# generate the ideal probability vector based on the interface TFs frequency in the best perturbations

# arguments
args = commandArgs(TRUE)
cutoff=args[1]
best = args[2]

# read best perturbations
pert_file = sprintf("bestcomb%s_perturbations-cutoff%s.txt",best,cutoff)

if(	file.exists(pert_file)) {
	perts = read.table(pert_file,header=TRUE,row.names=1,stringsAsFactors=FALSE)
} else {
	stop("Perturbations file not found")
}

# interface TFs
interfaceTFs = read.table(sprintf("interfaceTFs-cutoff%s.txt",cutoff),sep="\t",quote="",stringsAsFactors=FALSE)[,1]

symb_file = sprintf("singleTF_to_node-cutoff%s.txt",cutoff)
if(file.exists(symb_file)) {
    nrows = as.integer(unlist(strsplit(system(paste("wc -l ",symb_file),intern=TRUE)," +"))[1])
    if (nrows>0){
		symb = read.table(symb_file,stringsAsFactors=FALSE,sep="\t")# mapping of DE interface TFs to nodes
	}
}

# count interface TFs occurrence in best performing combinations
library(foreach)
library(doParallel)
numWorkers = min(c(detectCores() - 1,120))
registerDoParallel(numWorkers)

all_results = foreach(perturbation=rownames(perts), .combine=c, .multicombine=TRUE) %dopar% {
	#separate combination components
	singles = unlist(lapply(perturbation, function(x) strsplit(x,";")[[1]])) #intTF states, e.g. GATA1_0
	p_nodes = unlist(lapply(singles, function(x) strsplit(x,"_")[[1]][1]))	# intTFs, e.g. GATA1
	
	# deal with intTFs that were already in the GRN
	if(exists(deparse(substitute(symb)))) {
		symb_single_TFs = symb[symb[,1] %in% p_nodes ,2] # some TFs are in more than one!
		p_nodes = p_nodes[!p_nodes %in% symb[,1]]
		p_nodes = c(p_nodes,symb_single_TFs)
	}
	p_nodes = paste(p_nodes,unlist(lapply(singles, function(x) strsplit(x,"_")[[1]][2])),sep="_")
	return(p_nodes)
}	

# calculate interface TFs states frequency across best perturbations
freq = table(all_results)/nrow(perts)

# generate the probability vector
activate_idealP = rep(0,length(interfaceTFs))
names(activate_idealP) = paste(interfaceTFs,"1",sep="_")
activate_idealP[names(activate_idealP) %in% names(freq)] = freq[names(activate_idealP)[names(activate_idealP) %in% names(freq)]]

inhibit_idealP = rep(0,length(interfaceTFs))
names(inhibit_idealP) = paste(interfaceTFs,"0",sep="_")
inhibit_idealP[names(inhibit_idealP) %in% names(freq)] = freq[names(inhibit_idealP)[names(inhibit_idealP) %in% names(freq)]]

idealP = c(activate_idealP,inhibit_idealP)
idealP = idealP/sum(idealP) # normalize so that sum gives 1

write.table(cbind(idealP),sprintf("interfaceTFsP-cutoff%s-bestcomb%s.txt",cutoff,best), col.names=FALSE, sep="\t", quote=FALSE)
